Copy Tutorial to working directory

##{r} ##system("cp -rp /ihome/crc/training/fall2019/R_Introduction $HOME") ##

Set working directory to $HOME/R_Introduction

##{r} ##myhome <- paste0(system("echo $HOME", intern = TRUE),"/R_Introduction") ##

##{r} ##myhome ## ##{r setup} ##setwd(myhome) ##knitr::opts_knit$set(root.dir=myhome) ##

getwd()
## [1] "/Users/kimwong/OneDrive - University of Pittsburgh/Documents/Kim F. Wong/CRC_Workshop/2021/Shawn_Brown_Lecture/Learning_R_with_RStudio_F2021"

Output R version

R.version.string
## [1] "R version 4.0.3 (2020-10-10)"

Assignment, math, and printing

Syntax for assignment of variables

x <- 25
y <- 75
z <- x + y

Output value of variable

x; y; z
## [1] 25
## [1] 75
## [1] 100

Concatenate and output

cat(x,y,z)
## 25 75 100

Concatenate and output

cat(x,y,z,"\n")
## 25 75 100

Combine values into a vector or a list

s <- c(x,y,z)
print(s)
## [1]  25  75 100

Formatting

sprintf("%i %i %i", x,y,z)
## [1] "25 75 100"

Formatting

y <- 3.14823423
yy <- c(x,y,z)
sprintf("%1.2f", yy)
## [1] "25.00"  "3.15"   "100.00"
yy
## [1]  25.000000   3.148234 100.000000

Indexing in data structure

Syntax for a Vector

v <- c(2,4,6,8,10,12)
v
## [1]  2  4  6  8 10 12

Accessing specific elements of the vector

v[c(1,2,3,4,5,6)]
## [1]  2  4  6  8 10 12
v[c(1,2,3)]
## [1] 2 4 6
v[c(4,5,6)]
## [1]  8 10 12
v[3:5]
## [1]  6  8 10

Syntax for a Matrix

m <- matrix(c(1,2,3,4,5,6,7,8,9), nrow=3, ncol=3)
m
##      [,1] [,2] [,3]
## [1,]    1    4    7
## [2,]    2    5    8
## [3,]    3    6    9
matrix(c(1:5), nrow=2, ncol=5)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    3    5    2    4
## [2,]    2    4    1    3    5
matrix(c(1:5), nrow=2, ncol=5,T)
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    2    3    4    5
## [2,]    1    2    3    4    5
m[3,2]
## [1] 6

Syntax for data frame

students <- data.frame(
  name = c("Jack", "Jill", "Emma", "Billy", "Sarah"),
  hw1 = c(87, 90, 100, 75, 88),
  hw2 = c(95, 65, 95, 85, 100),
  hw3 = c(99, 95, 89, 93, 87),
  quiz1 = c(45, 55, 65, 70, 75),
  quiz2 = c(95, 85, 75, 65, 55),
  final = c(100, 95, 90, 85, 80)
  )

Accessing specific element of a data frame

students[1,6]
## [1] 95

Accessing a row of a data frame

students[2,]

Accessing a range of rows and columns

students[2:3,c(1,5:7)]

Accessing columns by name

students[ ,c("name", "hw2", "final")]
students[ ,c(1, 3, 7)]

Simple statistics on a data frame

Output students grades on the final

students[ , "final"]
## [1] 100  95  90  85  80

mean, min, max and standard deviation of grades on the final

mean(students[ , "final"])
## [1] 90
min(students[ , "final"])
## [1] 80
max(students[ , "final"])
## [1] 100
sd(students[ , "final"])
## [1] 7.905694

Summary statistics of grades on the final

summary(students[ , "final"])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      80      85      90      90      95     100

Summary statistics of specific data columns

summary(students[ , c("hw1", "hw2", "hw3")])
##       hw1           hw2           hw3      
##  Min.   : 75   Min.   : 65   Min.   :87.0  
##  1st Qu.: 87   1st Qu.: 85   1st Qu.:89.0  
##  Median : 88   Median : 95   Median :93.0  
##  Mean   : 88   Mean   : 88   Mean   :92.6  
##  3rd Qu.: 90   3rd Qu.: 95   3rd Qu.:95.0  
##  Max.   :100   Max.   :100   Max.   :99.0
summary(students[c(2:4), c("name","hw1", "hw2", "hw3")])
##      name                hw1              hw2             hw3       
##  Length:3           Min.   : 75.00   Min.   :65.00   Min.   :89.00  
##  Class :character   1st Qu.: 82.50   1st Qu.:75.00   1st Qu.:91.00  
##  Mode  :character   Median : 90.00   Median :85.00   Median :93.00  
##                     Mean   : 88.33   Mean   :81.67   Mean   :92.33  
##                     3rd Qu.: 95.00   3rd Qu.:90.00   3rd Qu.:94.00  
##                     Max.   :100.00   Max.   :95.00   Max.   :95.00

Sumary statistics of all data

summary(students)
##      name                hw1           hw2           hw3           quiz1   
##  Length:5           Min.   : 75   Min.   : 65   Min.   :87.0   Min.   :45  
##  Class :character   1st Qu.: 87   1st Qu.: 85   1st Qu.:89.0   1st Qu.:55  
##  Mode  :character   Median : 88   Median : 95   Median :93.0   Median :65  
##                     Mean   : 88   Mean   : 88   Mean   :92.6   Mean   :62  
##                     3rd Qu.: 90   3rd Qu.: 95   3rd Qu.:95.0   3rd Qu.:70  
##                     Max.   :100   Max.   :100   Max.   :99.0   Max.   :75  
##      quiz2        final    
##  Min.   :55   Min.   : 80  
##  1st Qu.:65   1st Qu.: 85  
##  Median :75   Median : 90  
##  Mean   :75   Mean   : 90  
##  3rd Qu.:85   3rd Qu.: 95  
##  Max.   :95   Max.   :100
dim(students)
## [1] 5 7

Querying info about variables

Let’s assign some variables

x <- 2.5
n <- 8L
nn <- 8.0
lett <- LETTERS[1:8]

Display the structure of above variables

str(x)
##  num 2.5
str(n)
##  int 8
str(nn)
##  num 8
str(lett)
##  chr [1:8] "A" "B" "C" "D" "E" "F" "G" "H"

Query if a variable had been declared

exists("x")
## [1] TRUE
exists("x_does_not_exist")
## [1] FALSE
is.integer(n)
## [1] TRUE
nn <- 318
is.integer(nn)
## [1] FALSE
nnn <- as.integer(nn)
nnn
## [1] 318
is.integer(nnn)
## [1] TRUE
x_does_not_exist <- FALSE
exists("x_does_not_exist")
## [1] TRUE
str(x_does_not_exist)
##  logi FALSE

Let’s create a data frame an query it’s dimensions

n <- 1:8
data <- data.frame(n,lett)
str(data)
## 'data.frame':    8 obs. of  2 variables:
##  $ n   : int  1 2 3 4 5 6 7 8
##  $ lett: chr  "A" "B" "C" "D" ...
length(n)
## [1] 8
length(data)
## [1] 2
nrow(data)
## [1] 8
ncol(data)
## [1] 2
dim(data)
## [1] 8 2

Common Random number operations

runif(1)
## [1] 0.00721674
myran <- runif(1000)
myran
##    [1] 0.882419812 0.741365923 0.603597072 0.612533455 0.010330846 0.583233788
##    [7] 0.341416823 0.564887987 0.707573951 0.902942424 0.631729024 0.326899253
##   [13] 0.163070313 0.444410938 0.193529781 0.889036517 0.271844494 0.397308493
##   [19] 0.201090167 0.648575138 0.295638061 0.602520156 0.251120158 0.417896067
##   [25] 0.566277690 0.635875485 0.626076552 0.777399895 0.678963094 0.134060139
##   [31] 0.697923272 0.002657523 0.354500694 0.716764482 0.034451044 0.884037562
##   [37] 0.150103385 0.389286662 0.766915353 0.825232639 0.209179882 0.899093662
##   [43] 0.831457753 0.145272884 0.384928957 0.857409969 0.336989371 0.825792056
##   [49] 0.702208548 0.544430390 0.301095688 0.880660719 0.776885475 0.472327210
##   [55] 0.492543388 0.071209919 0.017521285 0.572428182 0.837380518 0.621518911
##   [61] 0.330947568 0.758759883 0.606044115 0.238695869 0.496651685 0.801383782
##   [67] 0.010874533 0.809556912 0.975871391 0.876368336 0.274202797 0.428375535
##   [73] 0.707119676 0.584524964 0.289686995 0.793351816 0.762705466 0.604024079
##   [79] 0.765505297 0.759352987 0.050865235 0.014111127 0.404179862 0.618805456
##   [85] 0.334013695 0.668936209 0.733308276 0.823860674 0.354222035 0.269431759
##   [91] 0.310770144 0.646098401 0.171036304 0.230177095 0.846869928 0.741315154
##   [97] 0.360100125 0.378234537 0.422208627 0.868291331 0.462257632 0.406004830
##  [103] 0.992880174 0.802417898 0.042590453 0.221803626 0.246426632 0.464873887
##  [109] 0.058948313 0.967527515 0.397678127 0.978240722 0.136318216 0.355719176
##  [115] 0.707091283 0.355660192 0.377578863 0.166840565 0.461922600 0.523278672
##  [121] 0.168338302 0.220860502 0.085531204 0.558235951 0.959799935 0.829763163
##  [127] 0.968280049 0.648745549 0.413847833 0.159974171 0.872435860 0.724458783
##  [133] 0.033597104 0.995087542 0.693492441 0.598112318 0.118953907 0.280896093
##  [139] 0.953301534 0.258282161 0.692604989 0.965147684 0.314357996 0.786201335
##  [145] 0.439534305 0.426493140 0.330543875 0.761972186 0.533338884 0.945889162
##  [151] 0.258743897 0.561307783 0.767697048 0.771922547 0.241483220 0.740715605
##  [157] 0.408208873 0.070026482 0.168704881 0.454567576 0.320580333 0.393170937
##  [163] 0.299011751 0.451221093 0.678366860 0.155192104 0.821206778 0.832478903
##  [169] 0.423859298 0.766284866 0.762163333 0.007605700 0.853321522 0.923365895
##  [175] 0.269620642 0.183287347 0.923800270 0.886775724 0.513206327 0.510765147
##  [181] 0.511945359 0.672290123 0.709573955 0.599412617 0.732453969 0.296531420
##  [187] 0.617901705 0.026942209 0.538181621 0.093903906 0.902293797 0.734895141
##  [193] 0.519911913 0.707194950 0.419377451 0.128201938 0.419132930 0.082307370
##  [199] 0.908853150 0.396491915 0.203685379 0.937395421 0.282231502 0.557588055
##  [205] 0.152617328 0.641994737 0.739790880 0.999757659 0.516155543 0.560961966
##  [211] 0.745748474 0.873445011 0.749058625 0.216563578 0.137274418 0.589853803
##  [217] 0.389683152 0.562708637 0.774825004 0.333371673 0.711350003 0.869082987
##  [223] 0.233406646 0.439564117 0.557340819 0.907916426 0.364764841 0.792880174
##  [229] 0.402967039 0.149162667 0.923557475 0.161308712 0.965969722 0.509909118
##  [235] 0.981091870 0.377996029 0.325024630 0.176689553 0.516093415 0.462015470
##  [241] 0.183567190 0.703787391 0.227813556 0.071607389 0.596544125 0.307860981
##  [247] 0.204014680 0.752036449 0.558570368 0.649647620 0.864512509 0.419567223
##  [253] 0.990556044 0.533943513 0.937633362 0.084165811 0.578922853 0.865381612
##  [259] 0.062047633 0.001311505 0.162517498 0.361159390 0.486089448 0.024684800
##  [265] 0.561050896 0.375778310 0.989062270 0.002821170 0.666948311 0.450898241
##  [271] 0.340886954 0.375050270 0.106855161 0.529503070 0.310416541 0.797431352
##  [277] 0.196456567 0.233615418 0.391421463 0.151550779 0.735905083 0.132603546
##  [283] 0.418215199 0.356854343 0.206912593 0.561543856 0.919717113 0.431848795
##  [289] 0.157185354 0.098036546 0.812146109 0.499143945 0.568817945 0.026709618
##  [295] 0.522441483 0.384147024 0.191940162 0.637212027 0.720377696 0.833204086
##  [301] 0.720507573 0.842532542 0.811337058 0.926315278 0.860912968 0.182443183
##  [307] 0.047561183 0.961340153 0.424123777 0.104110238 0.360643580 0.073552418
##  [313] 0.340733699 0.426325765 0.861372291 0.866543473 0.837673903 0.480313425
##  [319] 0.420469815 0.183727008 0.447727792 0.679839449 0.188482050 0.810211230
##  [325] 0.709548278 0.040964132 0.203092578 0.076149253 0.930253724 0.002019158
##  [331] 0.679667232 0.343400404 0.229195196 0.430074796 0.959923347 0.258216192
##  [337] 0.012395655 0.832152032 0.909063197 0.709178971 0.598633321 0.354411485
##  [343] 0.750329947 0.523587351 0.993062651 0.291112623 0.974630362 0.953153718
##  [349] 0.366593565 0.579100975 0.940004236 0.776891558 0.779940113 0.406109314
##  [355] 0.142843721 0.526082673 0.996292451 0.669634171 0.669170032 0.242430832
##  [361] 0.077466733 0.823777839 0.046886677 0.835517573 0.142074928 0.864304380
##  [367] 0.792900552 0.554829369 0.698147102 0.465731889 0.204414920 0.795830813
##  [373] 0.747150148 0.067073798 0.842640071 0.427467154 0.803092296 0.392071058
##  [379] 0.568724020 0.044047196 0.418153245 0.648162079 0.245846338 0.640006154
##  [385] 0.538947255 0.155587956 0.060881828 0.895656678 0.759927652 0.754496624
##  [391] 0.491351804 0.383318545 0.534160326 0.196896191 0.281600704 0.882507380
##  [397] 0.628533761 0.180126353 0.946690807 0.597741592 0.817748943 0.626856490
##  [403] 0.932379181 0.037575370 0.792215147 0.555839301 0.249797598 0.147303641
##  [409] 0.160230377 0.136430452 0.489573434 0.863016026 0.307476457 0.537380403
##  [415] 0.407277593 0.479821341 0.438897898 0.545125114 0.552590694 0.030494144
##  [421] 0.044582999 0.079585868 0.320372370 0.383467960 0.573844050 0.104266367
##  [427] 0.624611632 0.005322131 0.872892021 0.321224074 0.266074139 0.587099567
##  [433] 0.828331099 0.100588180 0.886793509 0.805958714 0.687730259 0.803116301
##  [439] 0.744398743 0.536335179 0.778652374 0.029812173 0.064115667 0.428095432
##  [445] 0.922611416 0.758645174 0.677054819 0.135861152 0.548853211 0.542476706
##  [451] 0.588774046 0.785162067 0.218936809 0.973422616 0.620943949 0.746389210
##  [457] 0.614923568 0.458217490 0.689758558 0.209833522 0.896794104 0.798883764
##  [463] 0.969647767 0.566147475 0.267010164 0.172570768 0.137471177 0.559054713
##  [469] 0.195359309 0.921699193 0.135145196 0.747084729 0.612615837 0.603492085
##  [475] 0.388681188 0.430152996 0.038067294 0.356199570 0.278862726 0.106307965
##  [481] 0.071976428 0.602552379 0.017342851 0.321733781 0.036999141 0.432424210
##  [487] 0.993441041 0.336491644 0.914215389 0.640881211 0.362241779 0.545656755
##  [493] 0.681311932 0.583677237 0.909099794 0.516495242 0.116705100 0.488129606
##  [499] 0.868249684 0.248284875 0.570585482 0.511845052 0.285951967 0.729344011
##  [505] 0.901137504 0.000557920 0.707560995 0.925472652 0.220843169 0.975761418
##  [511] 0.242616646 0.871369991 0.276126785 0.401788290 0.476604429 0.559957637
##  [517] 0.422987592 0.018525196 0.394980394 0.233093024 0.681546389 0.566501446
##  [523] 0.665244594 0.596092809 0.697669435 0.229742539 0.513950478 0.702553303
##  [529] 0.409191352 0.325716134 0.338225112 0.451121780 0.108597222 0.540432131
##  [535] 0.380572215 0.347243363 0.011030372 0.479020505 0.231168697 0.043255075
##  [541] 0.086603209 0.208844252 0.880450918 0.459875378 0.009052018 0.318440288
##  [547] 0.918351311 0.354732971 0.534783691 0.005423849 0.977051672 0.837049824
##  [553] 0.788430573 0.132162192 0.595019478 0.507757391 0.451361051 0.169342174
##  [559] 0.868692440 0.208954238 0.264202166 0.188221312 0.492532343 0.088715851
##  [565] 0.913754155 0.221645036 0.735939280 0.239311021 0.896030696 0.516189344
##  [571] 0.861228754 0.295216704 0.507588912 0.372699154 0.344671142 0.123126278
##  [577] 0.276623777 0.793539987 0.038714385 0.350914455 0.114155468 0.481040069
##  [583] 0.299742800 0.786132715 0.001415051 0.262048169 0.076295996 0.212786599
##  [589] 0.974177575 0.747058090 0.557730756 0.902778166 0.124361099 0.591498703
##  [595] 0.652919753 0.361014639 0.752308992 0.971267930 0.196555710 0.563619864
##  [601] 0.333673553 0.793589623 0.604181946 0.765680104 0.543966186 0.387072244
##  [607] 0.002093269 0.172393528 0.067080317 0.942650234 0.361736791 0.242054048
##  [613] 0.587775155 0.603762726 0.971700365 0.483816689 0.355129978 0.647934587
##  [619] 0.487491339 0.666441202 0.297654229 0.811352168 0.759943687 0.417523694
##  [625] 0.044581891 0.753463814 0.294409976 0.863560068 0.935147601 0.172241569
##  [631] 0.329044106 0.839560854 0.762538460 0.513916269 0.043944414 0.579341973
##  [637] 0.328648613 0.531050866 0.906264819 0.233127027 0.111580558 0.924797306
##  [643] 0.309171075 0.645182338 0.083649923 0.228572697 0.320010037 0.230017999
##  [649] 0.540693832 0.071079178 0.721127761 0.469442226 0.390237165 0.307884346
##  [655] 0.782034168 0.896463095 0.898348060 0.424461185 0.138773269 0.703849268
##  [661] 0.923525887 0.585757955 0.011178207 0.338611412 0.828414502 0.506369022
##  [667] 0.372354152 0.404335340 0.651312959 0.065772202 0.052391739 0.386895077
##  [673] 0.105687540 0.200878629 0.515991641 0.194280462 0.035381129 0.917966434
##  [679] 0.231686569 0.079947189 0.257600055 0.846293016 0.046314784 0.318261739
##  [685] 0.027216852 0.240360899 0.458946846 0.938239746 0.042775312 0.026723776
##  [691] 0.613484560 0.774688394 0.329535539 0.189203619 0.452981016 0.675076799
##  [697] 0.662276481 0.594744980 0.934839521 0.977526725 0.837200684 0.018219642
##  [703] 0.006506717 0.774858738 0.991879006 0.426804793 0.856620345 0.963621758
##  [709] 0.673768350 0.607730325 0.253625580 0.581567982 0.971505394 0.932487713
##  [715] 0.050654433 0.568785165 0.738718179 0.240162621 0.751692967 0.824106595
##  [721] 0.970933449 0.981502876 0.288876986 0.500187206 0.682276764 0.347531113
##  [727] 0.555550829 0.237704490 0.380814527 0.595206258 0.924015860 0.272173979
##  [733] 0.340638632 0.456959398 0.770380412 0.734099231 0.231153664 0.276093125
##  [739] 0.918652134 0.214699738 0.900383952 0.995056809 0.672247454 0.909862651
##  [745] 0.016555960 0.840608323 0.555152412 0.960144894 0.678735120 0.196230914
##  [751] 0.777985330 0.547052542 0.366824923 0.445770897 0.138368023 0.503254032
##  [757] 0.518376202 0.468663729 0.919507452 0.786879412 0.120892389 0.711746722
##  [763] 0.960342418 0.998565243 0.050689301 0.183408892 0.906318237 0.754017317
##  [769] 0.340631921 0.669456489 0.279474546 0.390686080 0.483149340 0.179495460
##  [775] 0.222594554 0.211950488 0.638749522 0.696952224 0.639736858 0.908456281
##  [781] 0.548577101 0.380508375 0.237923553 0.627363317 0.370429684 0.193802330
##  [787] 0.834915933 0.504660706 0.728503406 0.513540601 0.234101456 0.632927269
##  [793] 0.906606933 0.896055818 0.787913130 0.721612185 0.134614121 0.535938527
##  [799] 0.948933443 0.072190938 0.550987639 0.828940071 0.298620529 0.988172984
##  [805] 0.686990600 0.461528485 0.152358692 0.844061207 0.499245745 0.365783756
##  [811] 0.210159262 0.673748599 0.033653814 0.261928061 0.156702066 0.796197572
##  [817] 0.770846095 0.857736923 0.726215209 0.020286011 0.648788913 0.325110177
##  [823] 0.685452182 0.029315814 0.044718813 0.690836923 0.946252326 0.611609022
##  [829] 0.994065697 0.747941711 0.390703205 0.834476176 0.095701382 0.222935472
##  [835] 0.480073389 0.207725384 0.209104967 0.396897682 0.900432388 0.200796870
##  [841] 0.623668703 0.620872608 0.578629836 0.927432726 0.295016505 0.885068537
##  [847] 0.728845167 0.754788321 0.592301258 0.539497072 0.215022543 0.464429275
##  [853] 0.176491572 0.201940084 0.466262370 0.209539742 0.025100662 0.186148380
##  [859] 0.014801851 0.679684240 0.651850422 0.471647774 0.178339166 0.495653799
##  [865] 0.548635005 0.297489181 0.190358497 0.168708485 0.446396953 0.178432100
##  [871] 0.681112171 0.838335896 0.859562863 0.887641122 0.414569945 0.510101967
##  [877] 0.685089255 0.667782580 0.981285806 0.646770385 0.294694867 0.641053145
##  [883] 0.508667369 0.815904862 0.295809238 0.537513837 0.852752432 0.124067596
##  [889] 0.863630113 0.237305745 0.631868464 0.144730473 0.838640692 0.534345287
##  [895] 0.169226069 0.717941987 0.548103620 0.465622414 0.704251861 0.316010155
##  [901] 0.396407094 0.579352281 0.265367753 0.426801071 0.740337830 0.799084757
##  [907] 0.657305081 0.542389390 0.747168762 0.421098417 0.297075837 0.838274989
##  [913] 0.168672224 0.448053660 0.020091488 0.452379323 0.118534828 0.778521840
##  [919] 0.604142830 0.180973517 0.851345440 0.728329386 0.474256140 0.004504524
##  [925] 0.768377739 0.539283557 0.520482832 0.560362896 0.705447193 0.998462903
##  [931] 0.464754057 0.446963488 0.066103426 0.924602964 0.443387946 0.200154661
##  [937] 0.150891229 0.278223425 0.986150397 0.685962652 0.769033866 0.155613129
##  [943] 0.024345771 0.974773911 0.399254150 0.618269715 0.296279720 0.554979204
##  [949] 0.439172166 0.259204851 0.032868326 0.074142882 0.447413664 0.352901525
##  [955] 0.161007288 0.275010670 0.216153468 0.118997995 0.704043146 0.538776144
##  [961] 0.181928352 0.622418319 0.282864565 0.694011896 0.700964784 0.688376266
##  [967] 0.858978819 0.379856516 0.711322863 0.042340424 0.611923265 0.748253512
##  [973] 0.171348887 0.730474901 0.278427380 0.038402506 0.552633558 0.841027116
##  [979] 0.695777023 0.126915451 0.298932941 0.727024411 0.480911083 0.078187770
##  [985] 0.278331887 0.650340814 0.328073525 0.517493737 0.520807039 0.138009547
##  [991] 0.830309740 0.534225978 0.702112238 0.177036250 0.114622413 0.557193185
##  [997] 0.314118815 0.507391617 0.681005740 0.226018894
runif(10, min=25, max=50)
##  [1] 47.19521 34.27023 46.40324 47.50572 41.95845 47.79780 38.52897 39.64524
##  [9] 44.30845 48.05846
sample(myran, 25, replace=TRUE)
##  [1] 0.378234537 0.340886954 0.542389390 0.401788290 0.095701382 0.924015860
##  [7] 0.835517573 0.233406646 0.377996029 0.679667232 0.796197572 0.264202166
## [13] 0.559957637 0.776891558 0.006506717 0.002821170 0.965147684 0.821206778
## [19] 0.733308276 0.539497072 0.018525196 0.150103385 0.388681188 0.995087542
## [25] 0.667782580
sample(1:11, 10, replace=FALSE)
##  [1] 10  9  8  1  4  3 11  2  6  5
rnorm(10)
##  [1]  0.15660613 -0.52189542  0.43109406 -0.61196006  0.01211578 -0.44994203
##  [7]  0.48800999  0.23307444 -0.65710333 -0.18143298
rnorm(10, mean=5, sd=15)
##  [1]  -7.460934   3.115424  12.632224  15.045717  15.237991   4.921994
##  [7]  -9.361068 -15.660290 -20.158913  21.890764
set.seed(88899)
runif(10)
##  [1] 0.48098750 0.65454142 0.67515894 0.23419069 0.96723262 0.81835158
##  [7] 0.87185885 0.29308983 0.03261824 0.97078604
runif(10)
##  [1] 0.65245457 0.04737159 0.44235093 0.25562254 0.59711015 0.60203716
##  [7] 0.17897926 0.04146090 0.45153764 0.36984252
set.seed(88899)
runif(20)
##  [1] 0.48098750 0.65454142 0.67515894 0.23419069 0.96723262 0.81835158
##  [7] 0.87185885 0.29308983 0.03261824 0.97078604 0.65245457 0.04737159
## [13] 0.44235093 0.25562254 0.59711015 0.60203716 0.17897926 0.04146090
## [19] 0.45153764 0.36984252

Loops

for (i in 1:15){
  if (!i %% 2){
    next
  }
  print(paste(i, "is odd"))
}
## [1] "1 is odd"
## [1] "3 is odd"
## [1] "5 is odd"
## [1] "7 is odd"
## [1] "9 is odd"
## [1] "11 is odd"
## [1] "13 is odd"
## [1] "15 is odd"
imax <- 20
i <- 1
while (i <= imax){
  if( !i %% 2){
    print(paste(i, "is even"))
  }
  i = i + 1
}
## [1] "2 is even"
## [1] "4 is even"
## [1] "6 is even"
## [1] "8 is even"
## [1] "10 is even"
## [1] "12 is even"
## [1] "14 is even"
## [1] "16 is even"
## [1] "18 is even"
## [1] "20 is even"
imax <- 20
i <- 1
while (i <= imax){
  if( i %% 2 == 0){
    print(paste(i, "is even"))
  } else {
    print(paste(i, "is odd"))
  }
  i <- i + 1
}
## [1] "1 is odd"
## [1] "2 is even"
## [1] "3 is odd"
## [1] "4 is even"
## [1] "5 is odd"
## [1] "6 is even"
## [1] "7 is odd"
## [1] "8 is even"
## [1] "9 is odd"
## [1] "10 is even"
## [1] "11 is odd"
## [1] "12 is even"
## [1] "13 is odd"
## [1] "14 is even"
## [1] "15 is odd"
## [1] "16 is even"
## [1] "17 is odd"
## [1] "18 is even"
## [1] "19 is odd"
## [1] "20 is even"

Input: Reading ASCII data

getwd()
## [1] "/Users/kimwong/OneDrive - University of Pittsburgh/Documents/Kim F. Wong/CRC_Workshop/2021/Shawn_Brown_Lecture/Learning_R_with_RStudio_F2021"
list.files()
##  [1] "Basic_R.html"                  "Basic_R.nb.html"              
##  [3] "Basic_R.pdf"                   "Basic_R.Rmd"                  
##  [5] "CRC-On-Demand-1.png"           "CRC-On-Demand-2.png"          
##  [7] "CRC-On-Demand-3.png"           "example.out"                  
##  [9] "example.R"                     "Slide24.png"                  
## [11] "Slide25.png"                   "Slide26.png"                  
## [13] "Slide27.png"                   "Slide28.png"                  
## [15] "students-ascii-nocompress.rds" "students-ascii.rds"           
## [17] "students-bin.rds"              "students-hacked.csv"          
## [19] "students-hacked.Rdmpd"         "students-hacked.tab"          
## [21] "students.csv"                  "students.tab"

Read comma-separated data

grades <- read.csv("students.csv")
grades

Read tab-delimited data

grades_tab <- read.table("students.tab", header=TRUE, sep="\t")
grades_tab

Output: writing ASCII data

Let’s modify some data first

grades[2, ]
grades[2,2:7] <- c(100,100,100,100,100,100)
grades[2, ]
grades

Write data in csv format

write.csv(grades, "students-hacked.csv")

Write data in tab-separated format

write.table(grades, "students-hacked.tab", sep="\t")

List files in working directory

list.files()
##  [1] "Basic_R.html"                  "Basic_R.nb.html"              
##  [3] "Basic_R.pdf"                   "Basic_R.Rmd"                  
##  [5] "CRC-On-Demand-1.png"           "CRC-On-Demand-2.png"          
##  [7] "CRC-On-Demand-3.png"           "example.out"                  
##  [9] "example.R"                     "Slide24.png"                  
## [11] "Slide25.png"                   "Slide26.png"                  
## [13] "Slide27.png"                   "Slide28.png"                  
## [15] "students-ascii-nocompress.rds" "students-ascii.rds"           
## [17] "students-bin.rds"              "students-hacked.csv"          
## [19] "students-hacked.Rdmpd"         "students-hacked.tab"          
## [21] "students.csv"                  "students.tab"

Output: beyond ASCII – dump

dump("grades", "students-hacked.Rdmpd")
rm("grades")

##{r} ##grades ##

source("students-hacked.Rdmpd")
grades
dump(c("grades", "students"), "students-hacked.Rdmpd")
##system("cat students-hacked.Rdmpd")
file.show("students-hacked.Rdmpd")

Output: beyond ASCII – saveRDS

saveRDS(grades, "students-bin.rds")
##system("cat students-bin.rds")
file.show("students-bin.rds")
saveRDS(grades, "students-ascii.rds", ascii=TRUE)
##system("cat students-ascii.rds")
file.show("students-ascii.rds")
saveRDS(grades, "students-ascii-nocompress.rds", ascii=TRUE, compress=FALSE)
##system("cat students-ascii-nocompress.rds")
file.show("students-ascii-nocompress.rds")

Running R scripts and outputting to file

This outputs to the console

source("example.R", print.eval=TRUE)
##  [1] 6.998114 5.015698 5.895312 6.382036 7.016532 6.062369 5.931604 5.107082
##  [9] 5.856181 5.256121
## [1] 10  2  8  3 10
##  [1] "South Carolina" "Alaska"         "Arizona"        "Virginia"      
##  [5] "Missouri"       "Colorado"       "South Dakota"   "Massachusetts" 
##  [9] "New York"       "New Mexico"     "Washington"     "Oklahoma"      
## [13] "Texas"          "Montana"        "Delaware"       "Illinois"      
## [17] "Ohio"           "Nevada"         "California"     "Maryland"      
## [21] "Connecticut"    "Michigan"       "Louisiana"      "Rhode Island"  
## [25] "Georgia"        "Indiana"        "Wisconsin"      "Utah"          
## [29] "New Jersey"     "Florida"        "Vermont"        "West Virginia" 
## [33] "North Carolina" "Mississippi"    "Wyoming"        "Alabama"       
## [37] "North Dakota"   "Tennessee"      "Idaho"          "Nebraska"      
## [41] "Kansas"         "New Hampshire"  "Maine"          "Hawaii"        
## [45] "Minnesota"      "Pennsylvania"   "Kentucky"       "Arkansas"      
## [49] "Oregon"         "Iowa"

This outputs to a specified file

##knit does not like sink{r} ##sink("example.out") ##

source("example.R", print.eval=TRUE)
##  [1] 6.950530 6.553134 5.300737 6.407774 7.161772 6.821719 6.137789 7.019207
##  [9] 7.439193 6.946376
## [1]  6  5 10  6  9
##  [1] "Pennsylvania"   "Tennessee"      "Alaska"         "Florida"       
##  [5] "Ohio"           "Colorado"       "Arizona"        "West Virginia" 
##  [9] "Iowa"           "Texas"          "New York"       "Kansas"        
## [13] "New Mexico"     "Mississippi"    "Nebraska"       "Wisconsin"     
## [17] "Oklahoma"       "Illinois"       "Oregon"         "Michigan"      
## [21] "Arkansas"       "Rhode Island"   "Minnesota"      "New Hampshire" 
## [25] "South Dakota"   "Idaho"          "California"     "South Carolina"
## [29] "Massachusetts"  "New Jersey"     "Vermont"        "Wyoming"       
## [33] "Utah"           "Virginia"       "Delaware"       "Missouri"      
## [37] "Maine"          "North Dakota"   "Indiana"        "Alabama"       
## [41] "Hawaii"         "Georgia"        "Montana"        "Kentucky"      
## [45] "Connecticut"    "Louisiana"      "North Carolina" "Washington"    
## [49] "Nevada"         "Maryland"
system("ls | grep example.out")
##system("cat example.out")
file.show("example.out")

Using RStudio Server on the CRC cluster

Parallelization Strategies in R

For the latest landscape overview for performing parallel computing within R, see

CRAN Task View: High-Performance and Parallel Computing with R

Here, we will only discuss the following R packages

  • foreach. Provides a new loop construct that can execute repeated tasks in parallel on multiple cores or multiple nodes of a cluster

  • parallel. Direct support for “coarse-grained” parallel execution. Coarse-grained in the sense that large chunks of computation tasks can be farmed out to the cores simultaneously.

  • doMC. Provides a parallel backend for the %dopar% function using the multicore functionality of the parallel package.

Example: foreach

library(foreach)
library(doParallel)
## Loading required package: iterators
## Loading required package: parallel
system.time( foreach(i=1:10000) %do% sum(tanh(1:i)) )
##    user  system elapsed 
##   1.496   0.338   1.834
registerDoParallel()
getDoParWorkers()
## [1] 12
system.time( foreach(i=1:10000) %dopar% sum(tanh(1:i)) )
##    user  system elapsed 
##   1.648   1.561   1.098
registerDoSEQ(); getDoParWorkers()
## [1] 1
system.time( foreach(i=1:10000) %dopar% sum(tanh(1:i)) )
##    user  system elapsed 
##   1.418   0.334   1.752
registerDoParallel(cores=1); getDoParWorkers()
## [1] 1
system.time( foreach(i=1:10000) %dopar% sum(tanh(1:i)) )
##    user  system elapsed 
##   1.295   0.487   1.784
registerDoParallel(cores=16); getDoParWorkers()
## [1] 16
system.time( foreach(i=1:10000) %dopar% sum(tanh(1:i)) )
##    user  system elapsed 
##   1.526   1.123   1.022
system.time( foreach(i=1:10000) %dopar% sum(tanh(1:i)) )
##    user  system elapsed 
##   1.474   1.037   0.993
registerDoParallel(cores=4)
system.time( foreach(i=1:10000) %dopar% sum(tanh(1:i)) )
##    user  system elapsed 
##   1.336   0.676   1.149

Example: foreach randomForest

library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
x <- matrix(runif(500), 100)
y <- gl(2, 50)
rf <- foreach(ntree=rep(250, 4), .combine=combine) %do% randomForest(x, y, ntree=ntree)
rf
## 
## Call:
##  randomForest(x = x, y = y, ntree = ntree) 
##                Type of random forest: classification
##                      Number of trees: 1000
## No. of variables tried at each split: 2
rf <- foreach(ntree=rep(250, 4), .combine=combine, .packages='randomForest') %dopar% randomForest(x, y, ntree=ntree)
rf
## 
## Call:
##  randomForest(x = x, y = y, ntree = ntree) 
##                Type of random forest: classification
##                      Number of trees: 1000
## No. of variables tried at each split: 2

Example: Multicore processing

library(doMC)
registerDoMC(cores=4)
library(rbenchmark)
max.eig <- function(N, sigma) {
  d <- matrix(rnorm(N**2, sd = sigma), nrow = N)
  E <- eigen(d)$values
  abs(E)[[1]]
}
benchmark(foreach(n = 1:50) %do% max.eig(n, 1),
          foreach(n = 1:50) %dopar% max.eig(n, 1)
)

Example: R on a cluster

library(doSNOW)
## Loading required package: snow
## 
## Attaching package: 'snow'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, clusterSplit, makeCluster, parApply,
##     parCapply, parLapply, parRapply, parSapply, splitIndices,
##     stopCluster
cluster = makeCluster(4, type = "SOCK")
registerDoSNOW(cluster)
benchmark(foreach(n = 1:50) %do% max.eig(n, 1),
          foreach(n = 1:50) %dopar% max.eig(n, 1)
)
stopCluster(cluster)
cluster = makeCluster(20, type = "SOCK")
registerDoSNOW(cluster)
benchmark(foreach(n = 1:50) %do% max.eig(n, 1),
          foreach(n = 1:50) %dopar% max.eig(n, 1)
)
stopCluster(cluster)